{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import re\n",
    "import numpy as np\n",
    "import datetime as dt\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def index_dict(d,excerpt):\n",
    "    '''indexes every array in the dict and returns new dict'''\n",
    "    for k,v in d.iteritems():\n",
    "        d[k] = v[excerpt]\n",
    "    return d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def vstack_dicts(d1,d2):\n",
    "    '''vstacks two dicts'''\n",
    "    assert d1.keys() == d2.keys(), \"can't combine two dicts with diff keys!\"\n",
    "    for k,v in d1.iteritems():\n",
    "        d1[k] = np.vstack((v,d2[k]))\n",
    "    \n",
    "    return d1\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def dict_element_len(d):\n",
    "    '''gets the length of a random element in the dict'''\n",
    "    return d[d.keys()[0]].shape[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def normalize(arr,min_=None, max_=None, axis=(0,2,3)):\n",
    "        if min_ is None or max_ is None:\n",
    "            min_ = arr.min(axis=(0,2,3), keepdims=True)\n",
    "\n",
    "            max_ = arr.max(axis=(0,2,3), keepdims=True)\n",
    "\n",
    "        midrange = (max_ + min_) / 2.\n",
    "\n",
    "        range_ = (max_ - min_) / 2.\n",
    "        \n",
    "        arr -= midrange\n",
    "\n",
    "        arr /= (range_)\n",
    "        return arr, min_, max_   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "\n",
    "def convert_bbox_minmax_to_cent_xywh(bboxes):\n",
    "    #current bbox set up is xmin,ymin,xmax,ymax\n",
    "    xmin, xmax,ymin,  ymax = [ bboxes[:,i] for i in range(4) ]\n",
    "    \n",
    "    w = xmax - xmin\n",
    "    h = ymax - ymin\n",
    "\n",
    "    x_c = xmin + w / 2.\n",
    "    y_c = ymin + h / 2.\n",
    "    \n",
    "    \n",
    "    bboxes[:,0] = x_c\n",
    "    bboxes[:,1] = y_c\n",
    "    bboxes[:,2] = w # w\n",
    "    bboxes[:,3] = h #h\n",
    "    return bboxes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def get_timestamp(filename):\n",
    "    #print filename\n",
    "    rpyear = re.compile(r\"(\\.h2\\.)(.*?)(-)\")\n",
    "    rpdaymonth = re.compile(r\"(-)(.*?)(\\d{5}\\.)\")\n",
    "    year=int(rpyear.search(filename).groups()[1])\n",
    "    tmp=rpdaymonth.search(filename).groups()[1].split('-')\n",
    "    month=int(tmp[0])\n",
    "    day=int(tmp[1])\n",
    "    return dt.date(year,month,day)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def get_camfiles(data_dir, years):\n",
    "    lsdir=os.listdir(data_dir)\n",
    "    rpfile = re.compile(r\"^cam5_.*\\.nc$\")\n",
    "    camfiles = [f for f in lsdir if rpfile.match(f)]\n",
    "    camfiles = [c for c in camfiles if get_timestamp(c).year in years]\n",
    "    camfiles.sort()\n",
    "    return camfiles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def interleave_variables(labelled_vars, kwargs, dim=2):\n",
    "    #get some metadata\n",
    "    n_tot_frames = sum([v.shape[0] for v in labelled_vars])\n",
    "    xdim = labelled_vars[0].shape[1]\n",
    "    ydim = labelled_vars[0].shape[2]\n",
    "    time_steps = labelled_vars[0].shape[0]\n",
    "    nvar = len(labelled_vars)\n",
    "    \n",
    "    def interleave_2d(labelled_vars):\n",
    "        #interleave each variable together\n",
    "        #tmp after this should be len(filenames)*4*nvar,768,1152\n",
    "        #nvar = 16 usually\n",
    "        tmp=np.empty((n_tot_frames,xdim,ydim ))\n",
    "        for i in range(nvar):\n",
    "            tmp[i::nvar,:] = labelled_vars[i]\n",
    "\n",
    "        #now make tmp len(filenames)*4, 16, 768,1152 array\n",
    "        tmp=tmp.reshape((time_steps, nvar, xdim, ydim))\n",
    "        return tmp\n",
    "        \n",
    "    def interleave_3d(labelled_vars, time_steps_per_example):\n",
    "        #interleaves each example in 3D fashion ,so takes k frames from each variable and\n",
    "        #concatenates them where k is time_steps_per_example\n",
    "        \n",
    "        num_ex = time_steps / time_steps_per_example\n",
    "        \n",
    "        tmp=np.empty((num_ex, nvar, time_steps_per_example, xdim, ydim))\n",
    "        for ex_ind in range(num_ex):\n",
    "            for var_ind in range(nvar):\n",
    "                tmp[ex_ind, var_ind,:time_steps_per_example,:] = labelled_vars[var_ind][ex_ind:ex_ind \n",
    "                                                                                        + time_steps_per_example]\n",
    "\n",
    "        return tmp\n",
    "    \n",
    "    if kwargs[\"im_dim\"] == 3:\n",
    "        return interleave_3d(labelled_vars, kwargs[\"3d_time_steps_per_example\"])\n",
    "    else:\n",
    "        return interleave_2d(labelled_vars)\n",
    "        \n",
    "        \n",
    "\n",
    "   \n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def convert_nc_data_to_tensor(dataset,kwargs):\n",
    "        #get every variable for every timestep across each file (var[i] is a len(filenames)*4, 768,1152 array )\n",
    "        var = [dataset.variables[v][:] for v in kwargs[\"variables\"]]\n",
    "\n",
    "        #get every other time step (b/c only labelled in every other)\n",
    "        labelled_vars = [v[::kwargs[\"time_step_sample_frequency\"]] for v in var]\n",
    "        \n",
    "        tensor = interleave_variables(labelled_vars,kwargs)\n",
    "        return tensor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "if __name__ == \"__main__\":\n",
    "\n",
    "    a ={\"a\":[2,3,4,5],\"b\":[3,4,5,6]}\n",
    "    b = {\"a\":[1],\"b\":[3]}\n",
    "\n",
    "    #c=vstack_dicts(a,b)\n",
    "    d = index_dict(a, slice(1,3,1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
